import jieba
from variant import *

jieba.add_word("八宝鸭")
jieba.add_word("牛毽子肉")
jieba.add_word("帝王蟹")
jieba.add_word("红膏炝蟹")
jieba.add_word("红膏蟹")
jieba.add_word("下水管道")
jieba.add_word("卤牛肉")

# 原始标签
labels = [
    "__label__00", "__label__01", "__label__02", "__label__03",
    "__label__04", "__label__05", "__label__06", "__label__07",
    "__label__08", "__label__09", "__label__10", "__label__11",
    "__label__12", "__label__13", "__label__14", "__label__15"
]

# 映射后的标签
labels_map = {
    "__label__00": "__label__一周计划",
    "__label__01": "__label__牛腱子肉",
    "__label__02": "__label__周一周三",
    "__label__03": "__label__周二周五",
    "__label__04": "__label__周六",
    "__label__05": "__label__红膏炝蟹",
    "__label__06": "__label__八宝鸭",
    "__label__07": "__label__开始做饭",
    "__label__08": "__label__设定闹钟",
    "__label__09": "__label__垃圾处理器",
    "__label__10": "__label__下水管疏通",
    "__label__11": "__label__天气",
    "__label__12": "__label__下水管除菌",
    "__label__13": "__label__卤牛肉",
    "__label__14": "__label__出水",
    "__label__15": "__label__水温"
}

# 准备训练数据
with open("training_data.txt", "w", encoding="utf-8") as f:
    for i, texts in enumerate([a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16], start=1):
        label = labels[i - 1]  # 获取原始标签
        mapped_label = labels_map[label]  # 获取映射后的标签
        print(label)
        for text in texts:
            # 对每个文本进行分词处理
            seg_list = jieba.cut(text, cut_all=False)
            segmented_text = " ".join(seg_list)  # 使用空格将分词结果连接成一个字符串
            # 写入文件，使用映射后的标签
            f.write(f"{label} {segmented_text}\n")
